clear
capture clear matrix
set mem 2g			
set maxvar 10000	
set matsize 5000

*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
* SET GLOBAL $PATHS
*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
global root = "INSERT CUSTOMIZED PATH"
global data= "$root\data"
*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

version 13		
set more off		
set scrollbufsize 50000


use "$data\raw\SOEP\pl.dta", clear


*********************
*Merge in other data*
*********************

merge 1:1 pid syear using "$data\raw\SOEP\pgen.dta"
tab _merge
keep if _merge==3
drop _merge

merge 1:1 pid syear using "$data\raw\SOEP\pequiv.dta"
tab _merge
keep if _merge==3
drop _merge


save "$data\temp\SOEP2005_2011.dta", replace


mvdecode _all, mv (-1=.a)
mvdecode _all, mv (-2=.b)
mvdecode _all, mv (-3=.c)
mvdecode _all, mv (-4=.d)
mvdecode _all, mv (-5=.e)
mvdecode _all, mv (-8=.)
mvdecode _all, mv (9999=.)
desc


***************
****pl data****
***************

sum ple0053 ple0055 ple0056 ple0072 ple0073 ple0096 ple0097 ple0098 ple0102 ple0107 ple0108 ple0109 ple0136 ple0113 ple0138 ple0140 ple0141


*sex
tab pla0009
gen female=0
replace female=1 if pla0009==2
tab female, miss

*birthyear
tab ple0010, miss
drop if ple0010<0
gen age = syear-ple0010
tab age
replace age=17 if age==16

*occupational status
tab plb0065, miss
gen civilservant=0
replace civilservant=1 if plb0065>0 &  plb0065<5

tab plb0004
tab plb0008
tab plb0057
tab plg0255
tab plb0586
tab plb0059, miss
tab plb0047
tab plb0060
tab plb0061
gen selfemployed=0

replace selfemployed=1 if plb0004>0 & plb0004<6
replace selfemployed=1 if plb0008==1
replace selfemployed=1 if plb0057>0 &  plb0057<7
replace selfemployed=1 if plg0255==1
replace selfemployed=1 if plb0586>0 &  plb0586<7
replace selfemployed=1 if plb0059>0 &  plb0059<7
replace selfemployed=1 if plb0047>0 &  plb0047<7
replace selfemployed=1 if plb0060>0 &  plb0060<4
replace selfemployed=1 if plb0061>0 &  plb0061<4

tab plb0064, miss
gen whitecollar=0
replace whitecollar=1 if plb0064>0 &  plb0064<8

*whiltecollar somehow not included, create as remaining category

sort syear
by syear: sum selfemployed
by syear: sum civilservant
by syear: sum whitecollar


*Utilization and Insurance~*
*~~~~~~~~~~~~~~~~~~~~~~~~~~*

*Utilization

tab ple0053, miss
gen hospitalstay=0
replace hospitalstay=1 if ple0053==1

tab ple0055
gen hospitalstays=ple0055


tab ple0056
recode ple0056 (missing=0), gen(hospitalnights)
replace hospitalnights=0 if ple0056<0

*

tab ple0072
tab ple0073
gen doctorvisits=ple0072
replace doctorvisits=0 if ple0073==1
tab doctorvisits, miss


*Insurance

tab ple0096
tab ple0097
tab ple0098
gen PrivateHI=1 if ple0097==2
sort syear
by syear: sum PrivateHI

gen SHI=1 if ple0097==1
by syear: sum SHI

*Note (Nicolas September 5, 2018): 
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*There are two questions and variables with information about privately insured. In the old questionnaire, there was first a filter question about whether someone was privately or publicly insured. 
*Then they skipped questions and then were asked about Beihilfe before being again asked about private insurance. 

*Variable names in SOEPinfo are OP6203 (1998) -- has just a 1 for privately insured -- and PP90 (1999) with 1: publicly, 2: privately, 3: none. 

*Long story short: I thought I had a bug in the coding because when doing it cross-sectionally, you have to code it different (I remembered from previous papers). But using the long dataset
*and the variables ple0096 (only until 2001) and, more importantly, ple0097 (all years) the issues has been corrected. Just using ple0097==2 is fine. All these obs are 1s on ple0096
*Fact is that privately insured sample size of non-civil servants and non-selfemployed increases from 286 in 1998 to 569 in 1999 and 1203 in 2000 due to refreshment samples.

*

tab ple0102
gen PrivateHIpolicyholder=1 if ple0102==2
gen PrivateHIfamilyenrollee=1 if ple0102==1

tab ple0099, miss
gen SHIfamilyenrollee=1 if ple0099==3
by syear: sum SHIfamilyenrollee

sort syear
by syear: sum PrivateHIpolicyholder
by syear: sum PrivateHIfamilyenrollee

*
tab ple0107
tab ple0108
tab ple0109
tab ple0113
gen PrivateHIdeductible=ple0107

*

tab ple0136
gen PrivateHIpremium=ple0136

*
tab ple0138
tab ple0139, miss
gen PrivateHISingleCoverage=1 if ple0138==1
gen PrivateHINoCovered=ple0139

*all years available
sort syear
by syear: sum  PrivateHISingleCoverage
by syear: sum PrivateHIpolicyholder



*****************
****pgen data****
*****************

tab  pglfs, miss
gen CurrentlyUnemployed=1 if pglfs==6
tab CurrentlyUnemployed, miss
replace CurrentlyUnemployed=0 if  CurrentlyUnemployed==.

tab pgoeffd
gen PublicSector=0
replace PublicSector=1 if pgoeffd==1
sort syear
by syear: sum PublicSector

*

tab pgemplst
gen fulltimeemployed=0
replace fulltimeemployed=1 if pgemplst==1

gen parttimeemployed=0
replace parttimeemployed=1 if pgemplst==2|pgemplst==4

gen jobtraining=0
replace jobtraining=1 if pgemplst==3

gen nonemployed=0
replace nonemployed=1 if pgemplst==5

tab pgpsbil
recode pgpsbil (6=1) (7=1) (8=1) (nonmiss=0), gen(noschool)
recode pgpsbil (6=1) (nonmiss=0), gen(dropout)
recode pgpsbil (1=1) (nonmiss=0), gen(Hauptschule)
recode pgpsbil (2=1) (nonmiss=0), gen(Realschule)
recode pgpsbil (3=1) (nonmiss=0), gen(Fachhochschule)
recode pgpsbil (4=1) (nonmiss=0), gen(Abi)

sort syear
by syear: sum noschool

tab pglfs, miss

*

gen labgro= pglabgro
gen labnet = pglabnet
sort syear
by syear: sum labgro
by syear: sum labnet


*******************
****pequiv data****
*******************

*Combined old-age, disability, etc. pensions IOLDY$$ 99
*Combined widows and orphans pension IWIDY$$ 100
*Combined company pension ICOMP$$ 101
*Combined private pension IPRVP$$ 102


for var ijob1-igrv2: replace X=0 if X==-2 
gen o_indtotal=0
for X in var i11110 ioldy  iwidy  iunby  iunay  isuby imaty  istuy  iprvp   ialim  ielse icomp:  replace o_indtotal=o_indtotal+X if X>0 
sum o_indtotal
sort syear
by syear: sum o_indtotal

*Einkommen (annual pre- bzw. post-govt-income jeweils PLUS IR) *
*OECD EQUIVALENCE SCALE 

gen oecd    =1+((d11106-1-h11101)*0.5)+(h11101*0.3) 
tab2 oecd d11106 
gen o_eqpost=(i11102+i11105)/oecd 
sum o_eqpost
by syear: sum o_eqpost

*save "$data\temp\SOEP1984_2016.dta", replace


*(now use Martin's conversion: not any more)
*(scalar exchr=1.329165

gen deflator=1
replace deflator=	0.5750 if syear ==	1984
replace deflator=	0.5869 if syear ==	1985
replace deflator=	0.5862 if syear ==	1986
replace deflator=	0.5876 if syear ==	1987
replace deflator=	0.5951 if syear ==	1988
replace deflator=	0.6117 if syear ==	1989
replace deflator=	0.6282 if syear ==	1990
replace deflator=	0.6536 if syear ==	1991
replace deflator=	0.6866 if syear ==	1992
replace deflator=	0.7174 if syear ==	1993
replace deflator=	0.7367 if syear ==	1994
replace deflator=	0.7492 if syear ==	1995
replace deflator=	0.7601 if syear ==	1996
replace deflator=	0.7748 if syear ==	1997
replace deflator=	0.7819 if syear ==	1998
replace deflator=	0.7865 if syear ==	1999
replace deflator=	0.7978 if syear ==	2000
replace deflator=	0.8136 if syear ==	2001
replace deflator=	0.8252 if syear ==	2002
replace deflator=	0.8337 if syear ==	2003
replace deflator=	0.8476 if syear ==	2004
replace deflator=	0.8607 if syear ==	2005
replace deflator=	0.8743 if syear ==	2006
replace deflator=	0.8944 if syear ==	2007
replace deflator=	0.9179 if syear ==	2008
replace deflator=	0.9208 if syear ==	2009
replace deflator=	0.9309 if syear ==	2010
replace deflator=	0.9503 if syear ==	2011
replace deflator=	0.9694 if syear ==	2012
replace deflator=	0.9839 if syear ==	2013
replace deflator=	0.9929 if syear ==	2014
replace deflator=	0.9952 if syear ==	2015
replace deflator=	1.0000 if syear ==	2016

tab deflator, miss



*****NOW INFLATE INCOME VARIABLES TO 2016 Dollars**********
*Average EUR-USD exchange in 2016 was 1.11 according to https://www.oanda.com/currency/average

gen labgroII=(labgro/deflator)*1.11
gen labnetII=(labnet/deflator)*1.11
gen o_indtotalII=(o_indtotal/deflator)*1.11
gen o_eqpostII=(o_eqpost/deflator)*1.11



***Sample Selection***

drop if age==.
sort syear
by syear: count if dropout==.a
drop if dropout==.a

by syear: sum o_indtotal
count if o_indtotal==.
by syear: count if o_indtotal==.

drop if o_indtotal==.
tab syear, miss

keep cid syear hid pid pnr w11101 w11102 w11103 w11105 female age civilservant selfemployed whitecollar hospitalstay hospitalnights doctorvisits PrivateHI PrivateHIfamilyenrollee PrivateHIdeductible PrivateHISingleCoverage PrivateHINoCovered PublicSector fulltimeemployed parttimeemployed jobtraining noschool dropout Hauptschule Realschule Fachhochschule Abi labgro labnet o_indtotal oecd o_eqpost deflator labgroII labnetII o_indtotalII



label variable cid "Enrollee ID"
label variable syear "Year"
label variable female "Female"
label variable labgro "Self-reported individual gross wages in euro of given year, SOEP"
label variable labnet "Self-reported individual net wages in euro of given year, SOEP"
label variable o_indtotal "Individual total income in euro of given year, SOEP"
label variable o_eqpost "Equivalized post-tax post-transfer income in euro of given year, SOEP"
*label variable premium "Self-reported monthly premium in euro of given year, original SOEP respondents"
*label variable state "State of residencs, SOEP respondents"

label variable oecd "OECD equivalent score (considering household members)"
label variable deflator "Deflator to inflate moneary euro values to 2016 dollars"
label variable labgroII "Labgro but inflated to 2016 dollars"
label variable labnetII "Labnet but inflated to 2016 dollars"
label variable o_eqpostII "o_eqpostII but inflated to 2016 dollars"
label variable o_indtotalII "o_indtotalII but inflated to 2016 dollars"

label variable noschool "No school degree"
label variable dropout "Dropout of high school"
label variable Hauptschule "Degree after 8/9 years of schooling"
label variable Realschule "Degree after 10 years of schooling"
label variable Fachhochschule "Degree after 12 years of schooling"
label variable Abi "Degree after 13 years of schooling"

label variable age "Age of respondent"
label variable civilservant "Binary, 1 if currently employed as civil servant"
label variable selfemployed "Binary, 1 if currently self-employed"
label variable whitecollar "Binary, 1 if currently employed as white collar"
label variable hospitalstay "Binary, 1 if hospital stay in past calendar year"
label variable hospitalnights "Number of hospital nights in past calendar year"
label variable doctorvisits "Number of doctor visist in past 3 months (interviews in first quarter of year)"
label variable PrivateHI "If privately insured"
label variable PrivateHIfamilyenrollee "If private insured through family coverage"
label variable PrivateHIdeductible "Deductible of private insurance policy (not asked before 1997)"
label variable PrivateHISingleCoverage "If private insurance is single coverage"
label variable PrivateHINoCovered "Number of people covered under policy"
label variable PublicSector  "Binary, 1 if currently employed in public sector"
label variable fulltimeemployed "Binary, 1 if currently full-time employed "
label variable parttimeemployed  "Binary, 1 if currently part-time employed"
label variable jobtraining "Binary, 1 if currently in job training"

***SUMMARY STATISTIC***
/*
global xvar "female age civilservant selfemployed whitecollar hospitalnights doctorvisits PrivateHI PrivateHIdeductible fulltimeemployed parttimeemployed noschool dropout Hauptschule Realschule Abi labgroII labnetII o_indtotalII o_eqpostII" 

sum $xvar
sutex $xvar, nobs labels minmax digits(4) title(Descriptive Statistics) key(DesStat) longtable file("DesStat.tex") replace
*/

save "$data\temp\SOEP1984_2016_short.dta", replace




